In [105]:
# Importing Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, RandomForestRegressor, GradientBoostingRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier, plot_tree, DecisionTreeRegressor
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, precision_score, recall_score
from sklearn.metrics import classification_report, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, StratifiedKFold, KFold
In [107]:
# Loading data from Google Drive
data_path = '/Users/sowjanyapadala/Desktop/Coursework/Q3/DATA_5322_Statistical_Machine_Learning2/Final_Project/Spotify_project/Dataset/SpotifyFeatures.csv'
spotify_df = pd.read_csv(data_path)
In [109]:
# Clean duplicated Tracks
spotify_df= spotify_df.drop_duplicates(subset=['track_id'])
count = spotify_df['track_id'].value_counts()
num_duplicated_ids = (count > 1).sum()
print("Number of unique tracks that are duplicated after cleaning:", num_duplicated_ids)
Number of unique tracks that are duplicated after cleaning: 0
In [111]:
# Strip Leading/Trailing Whitespace and Fix Case
spotify_df['genre'] = spotify_df['genre'].str.replace('’', "'", regex=False)
spotify_df['genre'].unique()
Out[111]:
array(['Movie', 'R&B', 'A Capella', 'Alternative', 'Country', 'Dance',
       'Electronic', 'Anime', 'Folk', 'Blues', 'Opera', 'Hip-Hop',
       "Children's Music", 'Rap', 'Indie', 'Classical', 'Pop', 'Reggae',
       'Reggaeton', 'Jazz', 'Rock', 'Ska', 'Comedy', 'Soul', 'Soundtrack',
       'World'], dtype=object)
In [113]:
# Mapping similar genre
genre_mapping = {
    'Pop': 'Pop/Rock',
    'Rock': 'Pop/Rock',
    'Indie': 'Pop/Rock',
    'Alternative': 'Pop/Rock',
    'Soul': 'Pop/Rock',
    'Hip-Hop': 'Hip-Hop/Rap/R&B',
    'Rap': 'Hip-Hop/Rap/R&B',
    'R&B': 'Hip-Hop/Rap/R&B',
    'Dance': 'Dance/Electronic',
    'Electronic': 'Dance/Electronic',
    'Reggaeton': 'Dance/Electronic',
    'Reggae': 'Dance/Electronic',
    'Ska': 'Dance/Electronic',
    'Jazz': 'Jazz/Blues',
    'Blues': 'Jazz/Blues',
    'Classical': 'Classical/Opera',
    'Opera': 'Classical/Opera',
    'Country': 'Country/Folk',
    'Folk': 'Country/Folk',
    'World': 'World/Soundtrack',
    'Soundtrack': 'World/Soundtrack',
    'Movie': 'Movie/Comedy',
    'Comedy': 'Movie/Comedy',
    'Children\'s Music': 'Children/Anime',
    'Anime': 'Children/Anime',
    'A Capella': 'Pop/Rock'
}
spotify_df['genre_grouped'] = spotify_df['genre'].map(genre_mapping)
In [115]:
print(spotify_df['genre_grouped'].value_counts())
genre_grouped
Dance/Electronic    42384
Pop/Rock            21606
Movie/Comedy        17476
Classical/Opera     16991
Jazz/Blues          16535
World/Soundtrack    16453
Children/Anime      15676
Country/Folk        15431
Hip-Hop/Rap/R&B     14222
Name: count, dtype: int64
In [117]:
# Log Transform
spotify_df['duration_ms'] = np.log1p(spotify_df['duration_ms'])
spotify_df['instrumentalness'] = np.log1p(spotify_df['instrumentalness'])
spotify_df['speechiness'] = np.log1p(spotify_df['speechiness'])

# Encode Categorical Data
for col in ['key', 'mode', 'time_signature']:
    le = LabelEncoder()
    spotify_df[col] = le.fit_transform(spotify_df[col])



numeric_features = [
    'popularity', 'acousticness', 'danceability', 'duration_ms', 'energy',
    'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'valence'
]
In [119]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
spotify_df[numeric_features] = scaler.fit_transform(spotify_df[numeric_features])
In [121]:
spotify_df[numeric_features].head(5)
Out[121]:
popularity acousticness danceability duration_ms energy instrumentalness liveness loudness speechiness tempo valence
0 -2.085747 0.564740 -0.798733 -1.752675 1.279646 -0.547439 0.575611 1.299283 -0.384671 1.588677 1.353170
1 -2.028246 -0.431708 0.257016 -1.015026 0.652467 -0.547439 -0.348444 0.715907 -0.165891 1.813226 1.360637
2 -1.913244 1.495668 0.640447 -0.526019 -1.544471 -0.547439 -0.575904 -0.585001 -0.491152 -0.565544 -0.312134
3 -2.085747 0.815900 -1.581352 -0.778149 -0.837536 -0.547439 -0.597228 -0.319034 -0.469460 1.741558 -0.838608
4 -1.855743 1.490208 -1.103376 -2.173113 -1.203692 -0.051906 -0.106768 -1.721889 -0.429543 0.746124 -0.229989
In [123]:
import seaborn as sns
import matplotlib.pyplot as plt
correlation_matrix = spotify_df[numeric_features].corr()
plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Heatmap of Numeric Features')
plt.show()
No description has been provided for this image
In [125]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import time
In [127]:
X = spotify_df[numeric_features].values
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(spotify_df['genre_grouped'])
y_categorical = to_categorical(y_encoded)
In [129]:
for i, label in enumerate(label_encoder.classes_):
    print(f"{i}: {label}")
0: Children/Anime
1: Classical/Opera
2: Country/Folk
3: Dance/Electronic
4: Hip-Hop/Rap/R&B
5: Jazz/Blues
6: Movie/Comedy
7: Pop/Rock
8: World/Soundtrack
In [131]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y_categorical, test_size=0.2, random_state=42, stratify=y_encoded
)
In [141]:
# Get the number of features and classes
input_shape = X_train.shape[1]
num_classes = y_train.shape[1]

# Build the model
model1_nn = Sequential()
model1_nn.add(Dense(256, activation='relu', input_shape=(input_shape,)))
model1_nn.add(BatchNormalization())
model1_nn.add(Dropout(0.3))

model1_nn.add(Dense(128, activation='relu'))
model1_nn.add(BatchNormalization())
model1_nn.add(Dropout(0.3))

model1_nn.add(Dense(64, activation='relu'))
model1_nn.add(BatchNormalization())
model1_nn.add(Dropout(0.3))

model1_nn.add(Dense(num_classes, activation='softmax'))

# Compile the model
model1_nn.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy', 'Precision', 'Recall']
)
/opt/anaconda3/lib/python3.12/site-packages/keras/src/layers/core/dense.py:87: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
In [16]:
early_stopping = EarlyStopping(
    monitor='val_accuracy',
    patience=10,
    restore_best_weights=True
)


start_time = time.time()  # Record the start time

history1 = model1_nn.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=100,
    batch_size=32,
    verbose=1,
    callbacks=[early_stopping]
)

end_time = time.time()  # Record the end time

total_time = end_time - start_time
print(f"Training completed in {total_time:.2f} seconds.")
Epoch 1/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 24s 5ms/step - Precision: 0.6331 - Recall: 0.3138 - accuracy: 0.4883 - loss: 1.4099 - val_Precision: 0.7422 - val_Recall: 0.4143 - val_accuracy: 0.5962 - val_loss: 1.0854
Epoch 2/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 4ms/step - Precision: 0.7056 - Recall: 0.3804 - accuracy: 0.5619 - loss: 1.1847 - val_Precision: 0.7359 - val_Recall: 0.4472 - val_accuracy: 0.6070 - val_loss: 1.0554
Epoch 3/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 5ms/step - Precision: 0.7125 - Recall: 0.4020 - accuracy: 0.5771 - loss: 1.1487 - val_Precision: 0.7302 - val_Recall: 0.4505 - val_accuracy: 0.6078 - val_loss: 1.0506
Epoch 4/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 19s 4ms/step - Precision: 0.7192 - Recall: 0.4118 - accuracy: 0.5814 - loss: 1.1320 - val_Precision: 0.7451 - val_Recall: 0.4522 - val_accuracy: 0.6117 - val_loss: 1.0384
Epoch 5/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 23s 5ms/step - Precision: 0.7225 - Recall: 0.4176 - accuracy: 0.5834 - loss: 1.1238 - val_Precision: 0.7498 - val_Recall: 0.4492 - val_accuracy: 0.6136 - val_loss: 1.0322
Epoch 6/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 43s 5ms/step - Precision: 0.7226 - Recall: 0.4198 - accuracy: 0.5840 - loss: 1.1196 - val_Precision: 0.7494 - val_Recall: 0.4572 - val_accuracy: 0.6165 - val_loss: 1.0303
Epoch 7/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 38s 5ms/step - Precision: 0.7234 - Recall: 0.4221 - accuracy: 0.5880 - loss: 1.1144 - val_Precision: 0.7473 - val_Recall: 0.4606 - val_accuracy: 0.6149 - val_loss: 1.0244
Epoch 8/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 19s 4ms/step - Precision: 0.7264 - Recall: 0.4241 - accuracy: 0.5892 - loss: 1.1089 - val_Precision: 0.7426 - val_Recall: 0.4723 - val_accuracy: 0.6190 - val_loss: 1.0174
Epoch 9/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 23s 5ms/step - Precision: 0.7240 - Recall: 0.4269 - accuracy: 0.5897 - loss: 1.1083 - val_Precision: 0.7530 - val_Recall: 0.4627 - val_accuracy: 0.6197 - val_loss: 1.0163
Epoch 10/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 41s 5ms/step - Precision: 0.7273 - Recall: 0.4299 - accuracy: 0.5942 - loss: 1.1024 - val_Precision: 0.7438 - val_Recall: 0.4690 - val_accuracy: 0.6196 - val_loss: 1.0169
Epoch 11/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 5ms/step - Precision: 0.7259 - Recall: 0.4296 - accuracy: 0.5940 - loss: 1.1013 - val_Precision: 0.7603 - val_Recall: 0.4545 - val_accuracy: 0.6226 - val_loss: 1.0119
Epoch 12/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 5ms/step - Precision: 0.7290 - Recall: 0.4321 - accuracy: 0.5941 - loss: 1.0953 - val_Precision: 0.7568 - val_Recall: 0.4590 - val_accuracy: 0.6215 - val_loss: 1.0138
Epoch 13/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 43s 5ms/step - Precision: 0.7313 - Recall: 0.4341 - accuracy: 0.5969 - loss: 1.0949 - val_Precision: 0.7527 - val_Recall: 0.4664 - val_accuracy: 0.6200 - val_loss: 1.0105
Epoch 14/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 41s 5ms/step - Precision: 0.7285 - Recall: 0.4340 - accuracy: 0.5966 - loss: 1.0926 - val_Precision: 0.7602 - val_Recall: 0.4586 - val_accuracy: 0.6229 - val_loss: 1.0067
Epoch 15/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 4ms/step - Precision: 0.7296 - Recall: 0.4355 - accuracy: 0.5961 - loss: 1.0886 - val_Precision: 0.7468 - val_Recall: 0.4794 - val_accuracy: 0.6227 - val_loss: 1.0025
Epoch 16/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 5ms/step - Precision: 0.7285 - Recall: 0.4357 - accuracy: 0.5954 - loss: 1.0894 - val_Precision: 0.7572 - val_Recall: 0.4703 - val_accuracy: 0.6257 - val_loss: 1.0047
Epoch 17/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 41s 5ms/step - Precision: 0.7297 - Recall: 0.4353 - accuracy: 0.5965 - loss: 1.0886 - val_Precision: 0.7542 - val_Recall: 0.4689 - val_accuracy: 0.6225 - val_loss: 1.0053
Epoch 18/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 4ms/step - Precision: 0.7299 - Recall: 0.4353 - accuracy: 0.5959 - loss: 1.0881 - val_Precision: 0.7579 - val_Recall: 0.4643 - val_accuracy: 0.6246 - val_loss: 1.0015
Epoch 19/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 22s 5ms/step - Precision: 0.7310 - Recall: 0.4378 - accuracy: 0.5987 - loss: 1.0829 - val_Precision: 0.7601 - val_Recall: 0.4681 - val_accuracy: 0.6251 - val_loss: 1.0040
Epoch 20/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 43s 5ms/step - Precision: 0.7332 - Recall: 0.4420 - accuracy: 0.6010 - loss: 1.0768 - val_Precision: 0.7544 - val_Recall: 0.4662 - val_accuracy: 0.6239 - val_loss: 1.0017
Epoch 21/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 37s 4ms/step - Precision: 0.7327 - Recall: 0.4382 - accuracy: 0.5991 - loss: 1.0828 - val_Precision: 0.7435 - val_Recall: 0.4897 - val_accuracy: 0.6257 - val_loss: 0.9985
Epoch 22/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 5ms/step - Precision: 0.7327 - Recall: 0.4413 - accuracy: 0.6002 - loss: 1.0806 - val_Precision: 0.7549 - val_Recall: 0.4727 - val_accuracy: 0.6267 - val_loss: 0.9998
Epoch 23/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 40s 5ms/step - Precision: 0.7330 - Recall: 0.4411 - accuracy: 0.6012 - loss: 1.0796 - val_Precision: 0.7477 - val_Recall: 0.4770 - val_accuracy: 0.6251 - val_loss: 1.0023
Epoch 24/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 4ms/step - Precision: 0.7310 - Recall: 0.4428 - accuracy: 0.6019 - loss: 1.0766 - val_Precision: 0.7505 - val_Recall: 0.4798 - val_accuracy: 0.6258 - val_loss: 0.9989
Epoch 25/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 5ms/step - Precision: 0.7330 - Recall: 0.4435 - accuracy: 0.6023 - loss: 1.0748 - val_Precision: 0.7506 - val_Recall: 0.4809 - val_accuracy: 0.6261 - val_loss: 0.9963
Epoch 26/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 40s 4ms/step - Precision: 0.7351 - Recall: 0.4450 - accuracy: 0.6013 - loss: 1.0767 - val_Precision: 0.7619 - val_Recall: 0.4640 - val_accuracy: 0.6259 - val_loss: 0.9993
Epoch 27/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 4ms/step - Precision: 0.7322 - Recall: 0.4403 - accuracy: 0.6018 - loss: 1.0786 - val_Precision: 0.7529 - val_Recall: 0.4816 - val_accuracy: 0.6272 - val_loss: 0.9965
Epoch 28/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 5ms/step - Precision: 0.7343 - Recall: 0.4472 - accuracy: 0.6041 - loss: 1.0695 - val_Precision: 0.7568 - val_Recall: 0.4760 - val_accuracy: 0.6279 - val_loss: 0.9920
Epoch 29/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 5ms/step - Precision: 0.7342 - Recall: 0.4426 - accuracy: 0.6023 - loss: 1.0747 - val_Precision: 0.7558 - val_Recall: 0.4741 - val_accuracy: 0.6275 - val_loss: 0.9961
Epoch 30/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 22s 5ms/step - Precision: 0.7361 - Recall: 0.4439 - accuracy: 0.6036 - loss: 1.0709 - val_Precision: 0.7550 - val_Recall: 0.4772 - val_accuracy: 0.6279 - val_loss: 0.9951
Epoch 31/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 39s 5ms/step - Precision: 0.7334 - Recall: 0.4436 - accuracy: 0.6026 - loss: 1.0757 - val_Precision: 0.7669 - val_Recall: 0.4666 - val_accuracy: 0.6303 - val_loss: 0.9926
Epoch 32/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 5ms/step - Precision: 0.7353 - Recall: 0.4450 - accuracy: 0.6039 - loss: 1.0681 - val_Precision: 0.7587 - val_Recall: 0.4755 - val_accuracy: 0.6290 - val_loss: 0.9920
Epoch 33/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 5ms/step - Precision: 0.7343 - Recall: 0.4410 - accuracy: 0.5999 - loss: 1.0735 - val_Precision: 0.7532 - val_Recall: 0.4839 - val_accuracy: 0.6298 - val_loss: 0.9906
Epoch 34/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 4ms/step - Precision: 0.7328 - Recall: 0.4442 - accuracy: 0.6026 - loss: 1.0748 - val_Precision: 0.7605 - val_Recall: 0.4696 - val_accuracy: 0.6288 - val_loss: 0.9924
Epoch 35/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 22s 5ms/step - Precision: 0.7340 - Recall: 0.4429 - accuracy: 0.6030 - loss: 1.0733 - val_Precision: 0.7560 - val_Recall: 0.4811 - val_accuracy: 0.6316 - val_loss: 0.9906
Epoch 36/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 41s 5ms/step - Precision: 0.7356 - Recall: 0.4495 - accuracy: 0.6058 - loss: 1.0666 - val_Precision: 0.7615 - val_Recall: 0.4743 - val_accuracy: 0.6285 - val_loss: 0.9893
Epoch 37/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 19s 4ms/step - Precision: 0.7363 - Recall: 0.4456 - accuracy: 0.6040 - loss: 1.0696 - val_Precision: 0.7504 - val_Recall: 0.4838 - val_accuracy: 0.6257 - val_loss: 0.9944
Epoch 38/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 22s 5ms/step - Precision: 0.7359 - Recall: 0.4485 - accuracy: 0.6030 - loss: 1.0690 - val_Precision: 0.7640 - val_Recall: 0.4712 - val_accuracy: 0.6298 - val_loss: 0.9888
Epoch 39/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 19s 4ms/step - Precision: 0.7343 - Recall: 0.4403 - accuracy: 0.6014 - loss: 1.0758 - val_Precision: 0.7618 - val_Recall: 0.4736 - val_accuracy: 0.6287 - val_loss: 0.9893
Epoch 40/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 22s 5ms/step - Precision: 0.7375 - Recall: 0.4462 - accuracy: 0.6042 - loss: 1.0709 - val_Precision: 0.7523 - val_Recall: 0.4808 - val_accuracy: 0.6277 - val_loss: 0.9908
Epoch 41/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 41s 5ms/step - Precision: 0.7354 - Recall: 0.4469 - accuracy: 0.6041 - loss: 1.0674 - val_Precision: 0.7585 - val_Recall: 0.4769 - val_accuracy: 0.6302 - val_loss: 0.9901
Epoch 42/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 40s 5ms/step - Precision: 0.7375 - Recall: 0.4503 - accuracy: 0.6065 - loss: 1.0643 - val_Precision: 0.7648 - val_Recall: 0.4688 - val_accuracy: 0.6297 - val_loss: 0.9888
Epoch 43/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 40s 5ms/step - Precision: 0.7372 - Recall: 0.4441 - accuracy: 0.6014 - loss: 1.0726 - val_Precision: 0.7587 - val_Recall: 0.4789 - val_accuracy: 0.6302 - val_loss: 0.9901
Epoch 44/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 4ms/step - Precision: 0.7362 - Recall: 0.4492 - accuracy: 0.6052 - loss: 1.0663 - val_Precision: 0.7554 - val_Recall: 0.4776 - val_accuracy: 0.6285 - val_loss: 0.9899
Epoch 45/100
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 4ms/step - Precision: 0.7377 - Recall: 0.4496 - accuracy: 0.6067 - loss: 1.0625 - val_Precision: 0.7662 - val_Recall: 0.4711 - val_accuracy: 0.6312 - val_loss: 0.9867
Training completed in 1231.25 seconds.
In [17]:
results = model1_nn.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {results[1]:.4f}")
print(f"Test Precision: {results[2]:.4f}")
print(f"Test Recall: {results[3]:.4f}")
Test Accuracy: 0.6316
Test Precision: 0.7560
Test Recall: 0.4811
In [18]:
import matplotlib.pyplot as plt

plt.figure(figsize=(20, 5))

# Precision Plot
plt.subplot(1, 4, 1)
plt.plot(history1.history['Precision'], label='Train Precision')
plt.plot(history1.history['val_Precision'], label='Validation Precision')
plt.xlabel('Epoch')
plt.ylabel('Precision')
plt.title('Precision over Epochs')
plt.legend()

# Recall Plot
plt.subplot(1, 4, 2)
plt.plot(history1.history['Recall'], label='Train Recall')
plt.plot(history1.history['val_Recall'], label='Validation Recall')
plt.xlabel('Epoch')
plt.ylabel('Recall')
plt.title('Recall over Epochs')
plt.legend()

# Accuracy Plot
plt.subplot(1, 4, 3)
plt.plot(history1.history['accuracy'], label='Train Accuracy')
plt.plot(history1.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy over Epochs')
plt.legend()

# Loss Plot
plt.subplot(1, 4, 4)
plt.plot(history1.history['loss'], label='Train Loss')
plt.plot(history1.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss over Epochs')
plt.legend()

plt.tight_layout()
plt.show()
No description has been provided for this image
In [19]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
# Predict on the test set
y_pred_probs1 = model1_nn.predict(X_test)

# Convert one-hot predictions to class labels
y_pred1 = np.argmax(y_pred_probs1, axis=1)

# Convert one-hot true labels to class labels
y_true1 = np.argmax(y_test, axis=1)
cm1 = confusion_matrix(y_true1, y_pred1)
class_names = label_encoder.classes_
disp = ConfusionMatrixDisplay(confusion_matrix=cm1, display_labels=class_names)
fig, ax = plt.subplots(figsize=(8,8))
disp.plot(cmap='Blues', ax=ax, xticks_rotation=90)
plt.title('Confusion Matrix')
plt.show()
1105/1105 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step
No description has been provided for this image

Performance Metrics (Test Set):
Accuracy: 63.16%
Precision: 75.60%
Recall: 48.11%
These results indicate that the model is effective in predicting genre classes with high precision which means that out of all the tracks the model predicted that the track belongs to particular genre approximately 75.6% were actually correct. However, the lower recall suggests that it misses several true positive cases.
The training/validation accuracy and precision curves shows a consistent upward trend, with validation outperforming training slightly which indicates good generalization.

In [20]:
# Build the model with 2 hidden layers
model2_nn = Sequential()

# First hidden layer
model2_nn.add(Dense(256, activation='relu', input_shape=(input_shape,)))
model2_nn.add(BatchNormalization())
model2_nn.add(Dropout(0.3))

# Second hidden layer
model2_nn.add(Dense(128, activation='relu'))
model2_nn.add(BatchNormalization())
model2_nn.add(Dropout(0.3))

# Output layer
model2_nn.add(Dense(num_classes, activation='softmax'))

# Compile the model
model2_nn.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy', 'Precision', 'Recall']
)
/usr/local/lib/python3.11/dist-packages/keras/src/layers/core/dense.py:87: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
In [21]:
start_time = time.time()
history2 = model2_nn.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=45,
    batch_size=32,
    verbose=1,
    callbacks=[early_stopping]
)
end_time = time.time()  # Record the end time

total_time = end_time - start_time
print(f"Training completed in {total_time:.2f} seconds.")
Epoch 1/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 4ms/step - Precision: 0.6358 - Recall: 0.3529 - accuracy: 0.5106 - loss: 1.3504 - val_Precision: 0.7355 - val_Recall: 0.4145 - val_accuracy: 0.5959 - val_loss: 1.0827
Epoch 2/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7095 - Recall: 0.4040 - accuracy: 0.5719 - loss: 1.1471 - val_Precision: 0.7391 - val_Recall: 0.4484 - val_accuracy: 0.6100 - val_loss: 1.0467
Epoch 3/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7153 - Recall: 0.4179 - accuracy: 0.5831 - loss: 1.1179 - val_Precision: 0.7455 - val_Recall: 0.4520 - val_accuracy: 0.6140 - val_loss: 1.0371
Epoch 4/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 19s 4ms/step - Precision: 0.7208 - Recall: 0.4210 - accuracy: 0.5841 - loss: 1.1116 - val_Precision: 0.7498 - val_Recall: 0.4546 - val_accuracy: 0.6182 - val_loss: 1.0263
Epoch 5/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 22s 4ms/step - Precision: 0.7215 - Recall: 0.4302 - accuracy: 0.5922 - loss: 1.0939 - val_Precision: 0.7499 - val_Recall: 0.4538 - val_accuracy: 0.6164 - val_loss: 1.0262
Epoch 6/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 4ms/step - Precision: 0.7240 - Recall: 0.4331 - accuracy: 0.5913 - loss: 1.0917 - val_Precision: 0.7505 - val_Recall: 0.4467 - val_accuracy: 0.6158 - val_loss: 1.0236
Epoch 7/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 4ms/step - Precision: 0.7286 - Recall: 0.4335 - accuracy: 0.5952 - loss: 1.0853 - val_Precision: 0.7394 - val_Recall: 0.4686 - val_accuracy: 0.6162 - val_loss: 1.0200
Epoch 8/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7299 - Recall: 0.4386 - accuracy: 0.5974 - loss: 1.0790 - val_Precision: 0.7491 - val_Recall: 0.4686 - val_accuracy: 0.6201 - val_loss: 1.0132
Epoch 9/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 19s 4ms/step - Precision: 0.7291 - Recall: 0.4372 - accuracy: 0.5973 - loss: 1.0776 - val_Precision: 0.7496 - val_Recall: 0.4668 - val_accuracy: 0.6223 - val_loss: 1.0114
Epoch 10/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 22s 4ms/step - Precision: 0.7295 - Recall: 0.4399 - accuracy: 0.5990 - loss: 1.0777 - val_Precision: 0.7503 - val_Recall: 0.4685 - val_accuracy: 0.6241 - val_loss: 1.0074
Epoch 11/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 4ms/step - Precision: 0.7324 - Recall: 0.4481 - accuracy: 0.6030 - loss: 1.0665 - val_Precision: 0.7545 - val_Recall: 0.4632 - val_accuracy: 0.6212 - val_loss: 1.0086
Epoch 12/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7312 - Recall: 0.4417 - accuracy: 0.5995 - loss: 1.0686 - val_Precision: 0.7439 - val_Recall: 0.4778 - val_accuracy: 0.6226 - val_loss: 1.0074
Epoch 13/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 5ms/step - Precision: 0.7335 - Recall: 0.4477 - accuracy: 0.6030 - loss: 1.0644 - val_Precision: 0.7584 - val_Recall: 0.4695 - val_accuracy: 0.6280 - val_loss: 0.9980
Epoch 14/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 42s 5ms/step - Precision: 0.7309 - Recall: 0.4444 - accuracy: 0.6007 - loss: 1.0663 - val_Precision: 0.7522 - val_Recall: 0.4723 - val_accuracy: 0.6259 - val_loss: 1.0003
Epoch 15/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7333 - Recall: 0.4482 - accuracy: 0.6025 - loss: 1.0613 - val_Precision: 0.7589 - val_Recall: 0.4685 - val_accuracy: 0.6272 - val_loss: 0.9981
Epoch 16/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7362 - Recall: 0.4491 - accuracy: 0.6042 - loss: 1.0572 - val_Precision: 0.7550 - val_Recall: 0.4750 - val_accuracy: 0.6282 - val_loss: 0.9950
Epoch 17/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 5ms/step - Precision: 0.7342 - Recall: 0.4483 - accuracy: 0.6040 - loss: 1.0596 - val_Precision: 0.7522 - val_Recall: 0.4712 - val_accuracy: 0.6250 - val_loss: 0.9980
Epoch 18/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 19s 4ms/step - Precision: 0.7326 - Recall: 0.4479 - accuracy: 0.6050 - loss: 1.0600 - val_Precision: 0.7513 - val_Recall: 0.4728 - val_accuracy: 0.6271 - val_loss: 1.0008
Epoch 19/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7372 - Recall: 0.4517 - accuracy: 0.6064 - loss: 1.0580 - val_Precision: 0.7483 - val_Recall: 0.4831 - val_accuracy: 0.6265 - val_loss: 0.9973
Epoch 20/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 4ms/step - Precision: 0.7343 - Recall: 0.4520 - accuracy: 0.6074 - loss: 1.0580 - val_Precision: 0.7575 - val_Recall: 0.4711 - val_accuracy: 0.6281 - val_loss: 0.9928
Epoch 21/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7370 - Recall: 0.4501 - accuracy: 0.6062 - loss: 1.0549 - val_Precision: 0.7530 - val_Recall: 0.4793 - val_accuracy: 0.6287 - val_loss: 0.9931
Epoch 22/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7363 - Recall: 0.4492 - accuracy: 0.6048 - loss: 1.0573 - val_Precision: 0.7524 - val_Recall: 0.4798 - val_accuracy: 0.6279 - val_loss: 0.9926
Epoch 23/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 22s 4ms/step - Precision: 0.7373 - Recall: 0.4516 - accuracy: 0.6060 - loss: 1.0517 - val_Precision: 0.7629 - val_Recall: 0.4654 - val_accuracy: 0.6289 - val_loss: 0.9937
Epoch 24/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7370 - Recall: 0.4481 - accuracy: 0.6055 - loss: 1.0533 - val_Precision: 0.7596 - val_Recall: 0.4727 - val_accuracy: 0.6307 - val_loss: 0.9900
Epoch 25/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 23s 5ms/step - Precision: 0.7360 - Recall: 0.4526 - accuracy: 0.6069 - loss: 1.0506 - val_Precision: 0.7585 - val_Recall: 0.4755 - val_accuracy: 0.6292 - val_loss: 0.9896
Epoch 26/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7364 - Recall: 0.4527 - accuracy: 0.6070 - loss: 1.0499 - val_Precision: 0.7639 - val_Recall: 0.4716 - val_accuracy: 0.6306 - val_loss: 0.9900
Epoch 27/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 4ms/step - Precision: 0.7359 - Recall: 0.4532 - accuracy: 0.6058 - loss: 1.0477 - val_Precision: 0.7585 - val_Recall: 0.4746 - val_accuracy: 0.6285 - val_loss: 0.9926
Epoch 28/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 4ms/step - Precision: 0.7377 - Recall: 0.4533 - accuracy: 0.6086 - loss: 1.0504 - val_Precision: 0.7614 - val_Recall: 0.4701 - val_accuracy: 0.6324 - val_loss: 0.9880
Epoch 29/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 4ms/step - Precision: 0.7381 - Recall: 0.4551 - accuracy: 0.6093 - loss: 1.0482 - val_Precision: 0.7598 - val_Recall: 0.4768 - val_accuracy: 0.6337 - val_loss: 0.9851
Epoch 30/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 4ms/step - Precision: 0.7373 - Recall: 0.4578 - accuracy: 0.6113 - loss: 1.0428 - val_Precision: 0.7664 - val_Recall: 0.4643 - val_accuracy: 0.6289 - val_loss: 0.9931
Epoch 31/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 22s 4ms/step - Precision: 0.7384 - Recall: 0.4570 - accuracy: 0.6087 - loss: 1.0458 - val_Precision: 0.7604 - val_Recall: 0.4691 - val_accuracy: 0.6314 - val_loss: 0.9890
Epoch 32/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7397 - Recall: 0.4573 - accuracy: 0.6102 - loss: 1.0447 - val_Precision: 0.7583 - val_Recall: 0.4751 - val_accuracy: 0.6310 - val_loss: 0.9879
Epoch 33/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7395 - Recall: 0.4536 - accuracy: 0.6077 - loss: 1.0498 - val_Precision: 0.7583 - val_Recall: 0.4776 - val_accuracy: 0.6289 - val_loss: 0.9890
Epoch 34/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 4ms/step - Precision: 0.7358 - Recall: 0.4524 - accuracy: 0.6067 - loss: 1.0514 - val_Precision: 0.7623 - val_Recall: 0.4739 - val_accuracy: 0.6307 - val_loss: 0.9874
Epoch 35/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7387 - Recall: 0.4569 - accuracy: 0.6078 - loss: 1.0476 - val_Precision: 0.7607 - val_Recall: 0.4752 - val_accuracy: 0.6342 - val_loss: 0.9851
Epoch 36/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 4ms/step - Precision: 0.7370 - Recall: 0.4553 - accuracy: 0.6070 - loss: 1.0464 - val_Precision: 0.7579 - val_Recall: 0.4741 - val_accuracy: 0.6314 - val_loss: 0.9888
Epoch 37/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7359 - Recall: 0.4540 - accuracy: 0.6084 - loss: 1.0479 - val_Precision: 0.7585 - val_Recall: 0.4819 - val_accuracy: 0.6328 - val_loss: 0.9848
Epoch 38/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 4ms/step - Precision: 0.7374 - Recall: 0.4561 - accuracy: 0.6091 - loss: 1.0466 - val_Precision: 0.7546 - val_Recall: 0.4885 - val_accuracy: 0.6323 - val_loss: 0.9835
Epoch 39/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7355 - Recall: 0.4569 - accuracy: 0.6091 - loss: 1.0465 - val_Precision: 0.7641 - val_Recall: 0.4739 - val_accuracy: 0.6326 - val_loss: 0.9851
Epoch 40/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 5ms/step - Precision: 0.7398 - Recall: 0.4582 - accuracy: 0.6097 - loss: 1.0469 - val_Precision: 0.7576 - val_Recall: 0.4757 - val_accuracy: 0.6301 - val_loss: 0.9891
Epoch 41/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 41s 5ms/step - Precision: 0.7373 - Recall: 0.4541 - accuracy: 0.6094 - loss: 1.0451 - val_Precision: 0.7535 - val_Recall: 0.4887 - val_accuracy: 0.6337 - val_loss: 0.9818
Epoch 42/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 40s 5ms/step - Precision: 0.7392 - Recall: 0.4618 - accuracy: 0.6119 - loss: 1.0390 - val_Precision: 0.7588 - val_Recall: 0.4756 - val_accuracy: 0.6307 - val_loss: 0.9892
Epoch 43/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7398 - Recall: 0.4569 - accuracy: 0.6107 - loss: 1.0437 - val_Precision: 0.7602 - val_Recall: 0.4782 - val_accuracy: 0.6324 - val_loss: 0.9843
Epoch 44/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7407 - Recall: 0.4583 - accuracy: 0.6101 - loss: 1.0452 - val_Precision: 0.7604 - val_Recall: 0.4777 - val_accuracy: 0.6339 - val_loss: 0.9853
Epoch 45/45
4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7374 - Recall: 0.4549 - accuracy: 0.6092 - loss: 1.0463 - val_Precision: 0.7607 - val_Recall: 0.4810 - val_accuracy: 0.6336 - val_loss: 0.9829
Training completed in 942.36 seconds.
In [22]:
results = model2_nn.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {results[1]:.4f}")
print(f"Test Precision: {results[2]:.4f}")
print(f"Test Recall: {results[3]:.4f}")
Test Accuracy: 0.6342
Test Precision: 0.7607
Test Recall: 0.4752
In [23]:
import matplotlib.pyplot as plt

plt.figure(figsize=(20, 5))

# Precision Plot
plt.subplot(1, 4, 1)
plt.plot(history2.history['Precision'], label='Train Precision')
plt.plot(history2.history['val_Precision'], label='Validation Precision')
plt.xlabel('Epoch')
plt.ylabel('Precision')
plt.title('Precision over Epochs')
plt.legend()

# Recall Plot
plt.subplot(1, 4, 2)
plt.plot(history2.history['Recall'], label='Train Recall')
plt.plot(history2.history['val_Recall'], label='Validation Recall')
plt.xlabel('Epoch')
plt.ylabel('Recall')
plt.title('Recall over Epochs')
plt.legend()

# Accuracy Plot
plt.subplot(1, 4, 3)
plt.plot(history2.history['accuracy'], label='Train Accuracy')
plt.plot(history2.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy over Epochs')
plt.legend()

# Loss Plot
plt.subplot(1, 4, 4)
plt.plot(history2.history['loss'], label='Train Loss')
plt.plot(history2.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss over Epochs')
plt.legend()

plt.tight_layout()
plt.show()
No description has been provided for this image
In [24]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
# Predict on the test set
y_pred_probs2 = model2_nn.predict(X_test)

# Convert one-hot predictions to class labels
y_pred2 = np.argmax(y_pred_probs2, axis=1)

# Convert one-hot true labels to class labels
y_true2= np.argmax(y_test, axis=1)
cm2 = confusion_matrix(y_true2, y_pred2)
class_names = label_encoder.classes_
disp = ConfusionMatrixDisplay(confusion_matrix=cm2, display_labels=class_names)
fig, ax = plt.subplots(figsize=(8,8))
disp.plot(cmap='Blues', ax=ax, xticks_rotation=90)
plt.title('Confusion Matrix')
plt.show()
1105/1105 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step
No description has been provided for this image

Performance Summary (Test Set):
Accuracy: 63.42%
Precision: 76.07%
Recall: 47.52%
The values when compared to model1, shows not much noticeable improvement in the accuracy, precision and recall is marginally lower compared to model1. This indicates that reducing the number of hidden layers from three to two in Model 2 did not lead to a significant performance gain. The minimal difference suggests that the third hidden layer in Model 1 may not have contributed substantial complexity or learning capacity to justify the additional computational cost. Model 2 thus achieves comparable performance with a simpler architecture, making it a more efficient alternative.

In [33]:
import pandas as pd
from sklearn.utils import shuffle

# Combine X and y into a DataFrame
data_combined = pd.concat([pd.DataFrame(X), pd.Series(y_encoded, name='label')], axis=1)

# Find the minimum class count
min_count = data_combined['label'].value_counts().min()

# Downsample each class
downsampled = data_combined.groupby('label').apply(lambda x: x.sample(min_count, random_state=42)).reset_index(drop=True)

# Separate features and labels
X_downsampled = downsampled.drop('label', axis=1).values
y_downsampled = downsampled['label'].values

# Encode labels to categorical
y_downsampled_categorical = to_categorical(y_downsampled)

# Split into train/test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X_downsampled, y_downsampled_categorical,
    test_size=0.2,
    random_state=42,
    stratify=y_downsampled
)
<ipython-input-33-761a065d90de>:11: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.
  downsampled = data_combined.groupby('label').apply(lambda x: x.sample(min_count, random_state=42)).reset_index(drop=True)
In [39]:
# Get the number of features and classes
input_shape = X_train.shape[1]
num_classes = y_train.shape[1]
# Build the model with 2 hidden layers
model4_nn = Sequential()

# First hidden layer
model4_nn.add(Dense(256, activation='relu', input_shape=(input_shape,)))
model4_nn.add(BatchNormalization())
model4_nn.add(Dropout(0.2))

# Second hidden layer
model4_nn.add(Dense(128, activation='relu'))
model4_nn.add(BatchNormalization())
model4_nn.add(Dropout(0.2))

# Output layer
model4_nn.add(Dense(num_classes, activation='softmax'))

# Compile the model
model4_nn.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy', 'Precision', 'Recall']
)
/usr/local/lib/python3.11/dist-packages/keras/src/layers/core/dense.py:87: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
In [41]:
results = model4_nn.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {results[1]:.4f}")
print(f"Test Precision: {results[2]:.4f}")
print(f"Test Recall: {results[3]:.4f}")
Test Accuracy: 0.6468
Test Precision: 0.7621
Test Recall: 0.5098
In [43]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
# Predict on the test set
y_pred_probs4 = model4_nn.predict(X_test)

# Convert one-hot predictions to class labels
y_pred4 = np.argmax(y_pred_probs4, axis=1)

# Convert one-hot true labels to class labels
y_true4= np.argmax(y_test, axis=1)
cm4 = confusion_matrix(y_true4, y_pred4)
class_names = label_encoder.classes_
disp = ConfusionMatrixDisplay(confusion_matrix=cm4, display_labels=class_names)
fig, ax = plt.subplots(figsize=(8,8))
disp.plot(cmap='Blues', ax=ax, xticks_rotation=90)
plt.title('Confusion Matrix')
plt.show()
800/800 ━━━━━━━━━━━━━━━━━━━━ 2s 2ms/step
No description has been provided for this image
In [48]:
# Build the model with 2 hidden layers
model5_nn = Sequential()

# First hidden layer
model5_nn.add(Dense(64, activation='relu', input_shape=(input_shape,)))
model5_nn.add(BatchNormalization())
model5_nn.add(Dropout(0.2))

# Second hidden layer
model5_nn.add(Dense(128, activation='relu'))
model5_nn.add(BatchNormalization())
model5_nn.add(Dropout(0.2))

# Output layer
model5_nn.add(Dense(num_classes, activation='softmax'))

# Compile the model
model5_nn.compile(
    optimizer=Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy', 'Precision', 'Recall']
)
In [49]:
import time
start_time = time.time()
history5 = model5_nn.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=128,
    verbose=1,
    callbacks=[early_stopping],

)
end_time = time.time()  # Record the end time

total_time = end_time - start_time
print(f"Training completed in {total_time:.2f} seconds.")
Epoch 1/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 27s 6ms/step - Precision: 0.6216 - Recall: 0.3058 - accuracy: 0.4706 - loss: 1.4942 - val_Precision: 0.7343 - val_Recall: 0.4148 - val_accuracy: 0.5963 - val_loss: 1.1150
Epoch 2/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 3s 4ms/step - Precision: 0.7062 - Recall: 0.3935 - accuracy: 0.5677 - loss: 1.1876 - val_Precision: 0.7353 - val_Recall: 0.4387 - val_accuracy: 0.6056 - val_loss: 1.0889
Epoch 3/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 3s 4ms/step - Precision: 0.7132 - Recall: 0.4090 - accuracy: 0.5790 - loss: 1.1580 - val_Precision: 0.7356 - val_Recall: 0.4479 - val_accuracy: 0.6092 - val_loss: 1.0726
Epoch 4/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 5s 6ms/step - Precision: 0.7204 - Recall: 0.4263 - accuracy: 0.5897 - loss: 1.1294 - val_Precision: 0.7428 - val_Recall: 0.4513 - val_accuracy: 0.6135 - val_loss: 1.0586
Epoch 5/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 4ms/step - Precision: 0.7252 - Recall: 0.4347 - accuracy: 0.5944 - loss: 1.1144 - val_Precision: 0.7410 - val_Recall: 0.4614 - val_accuracy: 0.6152 - val_loss: 1.0484
Epoch 6/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 5s 4ms/step - Precision: 0.7244 - Recall: 0.4404 - accuracy: 0.5969 - loss: 1.1076 - val_Precision: 0.7486 - val_Recall: 0.4576 - val_accuracy: 0.6179 - val_loss: 1.0386
Epoch 7/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 5s 6ms/step - Precision: 0.7252 - Recall: 0.4435 - accuracy: 0.5986 - loss: 1.1010 - val_Precision: 0.7469 - val_Recall: 0.4702 - val_accuracy: 0.6182 - val_loss: 1.0325
Epoch 8/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 4ms/step - Precision: 0.7297 - Recall: 0.4534 - accuracy: 0.6050 - loss: 1.0832 - val_Precision: 0.7425 - val_Recall: 0.4805 - val_accuracy: 0.6218 - val_loss: 1.0263
Epoch 9/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 5s 6ms/step - Precision: 0.7273 - Recall: 0.4508 - accuracy: 0.6037 - loss: 1.0911 - val_Precision: 0.7504 - val_Recall: 0.4662 - val_accuracy: 0.6224 - val_loss: 1.0252
Epoch 10/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 8s 10ms/step - Precision: 0.7318 - Recall: 0.4539 - accuracy: 0.6064 - loss: 1.0815 - val_Precision: 0.7458 - val_Recall: 0.4734 - val_accuracy: 0.6221 - val_loss: 1.0230
Epoch 11/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 5ms/step - Precision: 0.7281 - Recall: 0.4509 - accuracy: 0.6019 - loss: 1.0864 - val_Precision: 0.7483 - val_Recall: 0.4766 - val_accuracy: 0.6261 - val_loss: 1.0171
Epoch 12/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 4ms/step - Precision: 0.7315 - Recall: 0.4528 - accuracy: 0.6046 - loss: 1.0794 - val_Precision: 0.7506 - val_Recall: 0.4770 - val_accuracy: 0.6273 - val_loss: 1.0147
Epoch 13/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 5s 6ms/step - Precision: 0.7316 - Recall: 0.4570 - accuracy: 0.6072 - loss: 1.0730 - val_Precision: 0.7498 - val_Recall: 0.4758 - val_accuracy: 0.6255 - val_loss: 1.0150
Epoch 14/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 4ms/step - Precision: 0.7333 - Recall: 0.4583 - accuracy: 0.6075 - loss: 1.0705 - val_Precision: 0.7423 - val_Recall: 0.4868 - val_accuracy: 0.6233 - val_loss: 1.0146
Epoch 15/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 4ms/step - Precision: 0.7327 - Recall: 0.4636 - accuracy: 0.6108 - loss: 1.0649 - val_Precision: 0.7478 - val_Recall: 0.4816 - val_accuracy: 0.6273 - val_loss: 1.0115
Epoch 16/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 5ms/step - Precision: 0.7341 - Recall: 0.4627 - accuracy: 0.6121 - loss: 1.0643 - val_Precision: 0.7512 - val_Recall: 0.4791 - val_accuracy: 0.6279 - val_loss: 1.0049
Epoch 17/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 5ms/step - Precision: 0.7340 - Recall: 0.4653 - accuracy: 0.6093 - loss: 1.0647 - val_Precision: 0.7465 - val_Recall: 0.4917 - val_accuracy: 0.6275 - val_loss: 1.0051
Epoch 18/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 4ms/step - Precision: 0.7341 - Recall: 0.4655 - accuracy: 0.6141 - loss: 1.0644 - val_Precision: 0.7454 - val_Recall: 0.4947 - val_accuracy: 0.6295 - val_loss: 1.0029
Epoch 19/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 5ms/step - Precision: 0.7326 - Recall: 0.4659 - accuracy: 0.6115 - loss: 1.0632 - val_Precision: 0.7471 - val_Recall: 0.4893 - val_accuracy: 0.6296 - val_loss: 1.0028
Epoch 20/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 5s 4ms/step - Precision: 0.7341 - Recall: 0.4696 - accuracy: 0.6149 - loss: 1.0560 - val_Precision: 0.7503 - val_Recall: 0.4868 - val_accuracy: 0.6286 - val_loss: 1.0030
Epoch 21/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 3s 4ms/step - Precision: 0.7310 - Recall: 0.4649 - accuracy: 0.6132 - loss: 1.0623 - val_Precision: 0.7540 - val_Recall: 0.4817 - val_accuracy: 0.6297 - val_loss: 1.0011
Epoch 22/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 6s 5ms/step - Precision: 0.7366 - Recall: 0.4706 - accuracy: 0.6163 - loss: 1.0497 - val_Precision: 0.7542 - val_Recall: 0.4833 - val_accuracy: 0.6291 - val_loss: 0.9997
Epoch 23/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 5ms/step - Precision: 0.7345 - Recall: 0.4673 - accuracy: 0.6146 - loss: 1.0574 - val_Precision: 0.7547 - val_Recall: 0.4816 - val_accuracy: 0.6305 - val_loss: 0.9994
Epoch 24/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 5s 4ms/step - Precision: 0.7376 - Recall: 0.4675 - accuracy: 0.6128 - loss: 1.0524 - val_Precision: 0.7468 - val_Recall: 0.4920 - val_accuracy: 0.6275 - val_loss: 0.9991
Epoch 25/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 3s 4ms/step - Precision: 0.7354 - Recall: 0.4681 - accuracy: 0.6118 - loss: 1.0557 - val_Precision: 0.7531 - val_Recall: 0.4822 - val_accuracy: 0.6290 - val_loss: 0.9998
Epoch 26/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 6s 5ms/step - Precision: 0.7365 - Recall: 0.4711 - accuracy: 0.6142 - loss: 1.0504 - val_Precision: 0.7544 - val_Recall: 0.4850 - val_accuracy: 0.6299 - val_loss: 0.9985
Epoch 27/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 3s 4ms/step - Precision: 0.7389 - Recall: 0.4717 - accuracy: 0.6145 - loss: 1.0509 - val_Precision: 0.7458 - val_Recall: 0.4946 - val_accuracy: 0.6314 - val_loss: 0.9966
Epoch 28/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 6s 5ms/step - Precision: 0.7355 - Recall: 0.4715 - accuracy: 0.6162 - loss: 1.0507 - val_Precision: 0.7522 - val_Recall: 0.4889 - val_accuracy: 0.6326 - val_loss: 0.9941
Epoch 29/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 5ms/step - Precision: 0.7334 - Recall: 0.4678 - accuracy: 0.6122 - loss: 1.0591 - val_Precision: 0.7526 - val_Recall: 0.4894 - val_accuracy: 0.6314 - val_loss: 0.9930
Epoch 30/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 4ms/step - Precision: 0.7370 - Recall: 0.4708 - accuracy: 0.6141 - loss: 1.0499 - val_Precision: 0.7562 - val_Recall: 0.4843 - val_accuracy: 0.6328 - val_loss: 0.9943
Epoch 31/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 8s 8ms/step - Precision: 0.7367 - Recall: 0.4713 - accuracy: 0.6152 - loss: 1.0460 - val_Precision: 0.7548 - val_Recall: 0.4825 - val_accuracy: 0.6309 - val_loss: 0.9993
Epoch 32/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 4ms/step - Precision: 0.7349 - Recall: 0.4707 - accuracy: 0.6167 - loss: 1.0460 - val_Precision: 0.7493 - val_Recall: 0.4924 - val_accuracy: 0.6310 - val_loss: 0.9928
Epoch 33/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 5s 4ms/step - Precision: 0.7400 - Recall: 0.4733 - accuracy: 0.6175 - loss: 1.0463 - val_Precision: 0.7506 - val_Recall: 0.4860 - val_accuracy: 0.6316 - val_loss: 0.9939
Epoch 34/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 8s 10ms/step - Precision: 0.7386 - Recall: 0.4749 - accuracy: 0.6176 - loss: 1.0436 - val_Precision: 0.7534 - val_Recall: 0.4864 - val_accuracy: 0.6308 - val_loss: 0.9941
Epoch 35/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 9s 11ms/step - Precision: 0.7376 - Recall: 0.4730 - accuracy: 0.6166 - loss: 1.0476 - val_Precision: 0.7560 - val_Recall: 0.4833 - val_accuracy: 0.6299 - val_loss: 0.9963
Epoch 36/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 5s 6ms/step - Precision: 0.7396 - Recall: 0.4735 - accuracy: 0.6168 - loss: 1.0457 - val_Precision: 0.7447 - val_Recall: 0.4989 - val_accuracy: 0.6314 - val_loss: 0.9935
Epoch 37/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 3s 4ms/step - Precision: 0.7372 - Recall: 0.4747 - accuracy: 0.6169 - loss: 1.0430 - val_Precision: 0.7560 - val_Recall: 0.4839 - val_accuracy: 0.6328 - val_loss: 0.9910
Epoch 38/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 9s 11ms/step - Precision: 0.7392 - Recall: 0.4744 - accuracy: 0.6181 - loss: 1.0434 - val_Precision: 0.7551 - val_Recall: 0.4824 - val_accuracy: 0.6340 - val_loss: 0.9940
Epoch 39/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 8s 9ms/step - Precision: 0.7357 - Recall: 0.4710 - accuracy: 0.6154 - loss: 1.0438 - val_Precision: 0.7536 - val_Recall: 0.4878 - val_accuracy: 0.6322 - val_loss: 0.9901
Epoch 40/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 11s 10ms/step - Precision: 0.7380 - Recall: 0.4783 - accuracy: 0.6199 - loss: 1.0401 - val_Precision: 0.7512 - val_Recall: 0.4888 - val_accuracy: 0.6310 - val_loss: 0.9946
Epoch 41/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 8s 10ms/step - Precision: 0.7367 - Recall: 0.4688 - accuracy: 0.6153 - loss: 1.0484 - val_Precision: 0.7514 - val_Recall: 0.4920 - val_accuracy: 0.6325 - val_loss: 0.9920
Epoch 42/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 5s 6ms/step - Precision: 0.7373 - Recall: 0.4715 - accuracy: 0.6182 - loss: 1.0408 - val_Precision: 0.7526 - val_Recall: 0.4904 - val_accuracy: 0.6329 - val_loss: 0.9927
Epoch 43/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 3s 4ms/step - Precision: 0.7378 - Recall: 0.4736 - accuracy: 0.6166 - loss: 1.0449 - val_Precision: 0.7500 - val_Recall: 0.4960 - val_accuracy: 0.6325 - val_loss: 0.9875
Epoch 44/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 3s 4ms/step - Precision: 0.7375 - Recall: 0.4733 - accuracy: 0.6145 - loss: 1.0417 - val_Precision: 0.7560 - val_Recall: 0.4893 - val_accuracy: 0.6343 - val_loss: 0.9901
Epoch 45/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 8s 10ms/step - Precision: 0.7410 - Recall: 0.4758 - accuracy: 0.6193 - loss: 1.0356 - val_Precision: 0.7511 - val_Recall: 0.4964 - val_accuracy: 0.6331 - val_loss: 0.9880
Epoch 46/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 8s 6ms/step - Precision: 0.7390 - Recall: 0.4777 - accuracy: 0.6211 - loss: 1.0444 - val_Precision: 0.7537 - val_Recall: 0.4934 - val_accuracy: 0.6330 - val_loss: 0.9911
Epoch 47/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 11s 8ms/step - Precision: 0.7412 - Recall: 0.4787 - accuracy: 0.6192 - loss: 1.0378 - val_Precision: 0.7586 - val_Recall: 0.4893 - val_accuracy: 0.6351 - val_loss: 0.9865
Epoch 48/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 11s 9ms/step - Precision: 0.7401 - Recall: 0.4760 - accuracy: 0.6196 - loss: 1.0434 - val_Precision: 0.7539 - val_Recall: 0.4896 - val_accuracy: 0.6339 - val_loss: 0.9906
Epoch 49/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 6s 4ms/step - Precision: 0.7428 - Recall: 0.4782 - accuracy: 0.6211 - loss: 1.0363 - val_Precision: 0.7536 - val_Recall: 0.4950 - val_accuracy: 0.6347 - val_loss: 0.9868
Epoch 50/50
800/800 ━━━━━━━━━━━━━━━━━━━━ 6s 7ms/step - Precision: 0.7398 - Recall: 0.4778 - accuracy: 0.6214 - loss: 1.0395 - val_Precision: 0.7546 - val_Recall: 0.4918 - val_accuracy: 0.6343 - val_loss: 0.9879
Training completed in 300.60 seconds.
In [50]:
results = model5_nn.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {results[1]:.4f}")
print(f"Test Precision: {results[2]:.4f}")
print(f"Test Recall: {results[3]:.4f}")
Test Accuracy: 0.6351
Test Precision: 0.7586
Test Recall: 0.4893
In [51]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
# Predict on the test set
y_pred_probs5 = model4_nn.predict(X_test)

# Convert one-hot predictions to class labels
y_pred5 = np.argmax(y_pred_probs4, axis=1)

# Convert one-hot true labels to class labels
y_true5= np.argmax(y_test, axis=1)
cm5 = confusion_matrix(y_true5, y_pred5)
class_names = label_encoder.classes_
disp = ConfusionMatrixDisplay(confusion_matrix=cm5, display_labels=class_names)
fig, ax = plt.subplots(figsize=(8,8))
disp.plot(cmap='Blues', ax=ax, xticks_rotation=90)
plt.title('Confusion Matrix')
plt.show()
800/800 ━━━━━━━━━━━━━━━━━━━━ 1s 1ms/step
No description has been provided for this image
In [53]:
from sklearn.metrics import classification_report


# For Model A
report_a = classification_report(y_true2, y_pred2, target_names=label_encoder.classes_)
print("Model A Classification Report:\n", report_a)

# For Model B
report_b = classification_report(y_true4, y_pred4, target_names=label_encoder.classes_)
print("Model B Classification Report with Undersampling:\n", report_b)
Model A Classification Report:
                   precision    recall  f1-score   support

  Children/Anime       0.71      0.55      0.62      3135
 Classical/Opera       0.76      0.83      0.79      3398
    Country/Folk       0.50      0.55      0.52      3086
Dance/Electronic       0.65      0.72      0.68      8477
 Hip-Hop/Rap/R&B       0.59      0.61      0.60      2845
      Jazz/Blues       0.53      0.51      0.52      3307
    Movie/Comedy       0.83      0.75      0.79      3495
        Pop/Rock       0.47      0.45      0.46      4321
World/Soundtrack       0.68      0.64      0.66      3291

        accuracy                           0.63     35355
       macro avg       0.64      0.62      0.63     35355
    weighted avg       0.64      0.63      0.63     35355

Model B Classification Report with Undersampling:
                   precision    recall  f1-score   support

  Children/Anime       0.76      0.60      0.67      2844
 Classical/Opera       0.75      0.85      0.79      2844
    Country/Folk       0.53      0.62      0.57      2845
Dance/Electronic       0.59      0.49      0.54      2845
 Hip-Hop/Rap/R&B       0.62      0.80      0.70      2845
      Jazz/Blues       0.58      0.58      0.58      2844
    Movie/Comedy       0.84      0.79      0.82      2845
        Pop/Rock       0.48      0.43      0.45      2844
World/Soundtrack       0.69      0.65      0.67      2844

        accuracy                           0.65     25600
       macro avg       0.65      0.65      0.64     25600
    weighted avg       0.65      0.65      0.64     25600

Performance Summary (Test Set) for model 4(2 hidden layers with 256 and 128 units):
Accuracy: 64.68%
Precision: 76.21%
Recall: 50.98%
These results mark a notable improvement in recall, while accuracy and precision slightly exceeded when compared to previous models. The higher recall indicates the model became more sensitive to identifying correct genre labels across all classes, likely due to the balanced class distribution during training. Undersampling the data led to better generalization across all genre classes, especially for those with the fewer samples for the class.
Even the confusion matrix shows predictions are more evenly distributed across genre classes, with less dominance by majority class Dance/Electronic.

Performance Summary (Test Set) for model 5(2 hidden layers with 64 and 128 units):
Accuracy: 63.52%
Precision: 75.86%
Recall: 48.93%
Compared to Model 4, Model 5 demonstrated a slight decline in all performance metrics. This suggests that reducing the capacity of the first hidden layer to 64 units may have limited the model’s ability to effectively learn complex feature patterns, resulting in lower generalization performance.

Among all models evaluated, the neural network trained on balanced (undersampled) data with two hidden layers of sizes 256 and 128 (Model 4) achieved the best overall results. This makes it the most effective architecture for multiclass genre classification in this project.

In [57]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
y_true = y_test
n_classes = y_true.shape[1]

fpr = dict()
tpr = dict()
roc_auc = dict()

for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_true[:, i], y_pred_probs4[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])
plt.figure(figsize=(10, 8))
colors = ['aqua', 'darkorange', 'cornflowerblue', 'green', 'red',
          'purple', 'brown', 'pink', 'grey', 'olive', 'gold', 'black']

for i, color in zip(range(n_classes), colors):
    plt.plot(fpr[i], tpr[i], color=color, lw=2,
             label=f'ROC curve for {label_encoder.classes_[i]} (area = {roc_auc[i]:0.2f})')

plt.plot([0, 1], [0, 1], 'k--', lw=2)  # Diagonal line

plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('AUC-ROC Curves per Species')
plt.legend(loc='lower right')
plt.grid(True)
plt.show()
No description has been provided for this image

From the above plot, the AUC (Area Under Curve) values range from 0.89 to 0.98, which indicates that the model performs very well across all genre classes.

Classical/Opera and Movie/Comedy achieved the highest AUC values of 0.98, suggesting that the model is highly effective in distinguishing these genres from the others. The lowest AUC was for Pop/Rock (0.89), indicating that this genre is the most challenging to classify.

Conventional Methods¶

In [135]:
df_multi = X.copy()
y_multi = spotify_df['genre_grouped']
X_train, X_test, y_train, y_test = train_test_split(
    df_multi, y_multi, test_size=0.2, random_state=42, stratify=y_multi
)

Decision Trees¶

In [138]:
cv = KFold(n_splits = 5, shuffle = True, random_state = 5322)
param_grid = {
    'max_depth': [3, 5, 6, X_train.shape[1]],
    'min_samples_split': [2, 3, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'class_weight': [None, 'balanced']
}

multi_dt = DecisionTreeClassifier(random_state = 5322)
grid_search = GridSearchCV(
    estimator = multi_dt,
    param_grid = param_grid,
    cv = cv,
    scoring = 'f1_weighted',
    n_jobs = -1,
    verbose = 1
)

start_time = time.time()
grid_search.fit(X_train, y_train)
end_time = time.time()
training_duration = end_time - start_time
print(f"Training time (Decision Tree with GridSearchCV): {training_duration:.2f} seconds")
Fitting 5 folds for each of 96 candidates, totalling 480 fits
Training time (Decision Tree with GridSearchCV): 56.89 seconds
In [140]:
grid_search.best_params_
Out[140]:
{'class_weight': None,
 'max_depth': 11,
 'min_samples_leaf': 4,
 'min_samples_split': 2}
In [142]:
best_multi_dt = grid_search.best_estimator_

binary_multi_pred = best_multi_dt.predict(X_test)
print(classification_report(y_test, binary_multi_pred))
                  precision    recall  f1-score   support

  Children/Anime       0.64      0.49      0.56      3135
 Classical/Opera       0.74      0.78      0.76      3398
    Country/Folk       0.43      0.52      0.47      3086
Dance/Electronic       0.60      0.65      0.62      8477
 Hip-Hop/Rap/R&B       0.58      0.55      0.56      2845
      Jazz/Blues       0.45      0.38      0.41      3307
    Movie/Comedy       0.81      0.72      0.76      3495
        Pop/Rock       0.42      0.45      0.44      4321
World/Soundtrack       0.62      0.58      0.60      3291

        accuracy                           0.58     35355
       macro avg       0.59      0.57      0.58     35355
    weighted avg       0.59      0.58      0.58     35355

In [144]:
class_names = [
    "Children/Anime",
    "Classical/Opera",
    "Country/Folk",
    "Dance/Electronic",
    "Hip-Hop/Rap/R&B",
    "Jazz/Blues",
    "Movie/Comedy",
    "Pop/Rock",
    "World/Soundtrack"
]
cm_dt = confusion_matrix(y_test, binary_multi_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm_dt, display_labels=class_names)
fig, ax = plt.subplots(figsize=(8,8))
disp.plot(cmap='Blues', ax=ax, xticks_rotation=90)
plt.title('Confusion Matrix')
plt.show()
No description has been provided for this image
In [146]:
# Plotting the variable importance for boosting model
feature_importance = best_multi_dt.feature_importances_*100
rel_imp = pd.Series(feature_importance, index=numeric_features).sort_values(ascending = True,inplace=False)
rel_imp.tail(10).T.plot(kind='barh')
plt.xlabel('Variable importance')
plt.title('Variable importance for Decision Tree model')
Out[146]:
Text(0.5, 1.0, 'Variable importance for Decision Tree model')
No description has been provided for this image
In [148]:
plt.figure(figsize=(50,50))
plot_tree(best_multi_dt
          , filled=True
          , feature_names=numeric_features
          , label='all'
          , fontsize=12)
plt.show()
No description has been provided for this image

The Decision Tree model was trained to classify tracks into one of nine grouped genres using musical features such as tempo, loudness, acousticness, and danceability. Hyperparameters 'max_depth', 'min_samples_split', 'min_samples_leaf' were optimized using a grid search with 5-fold cross-validation.

Best Model Parameters: max_depth: 11 min_samples_split: 2 min_samples_leaf: 4 class_weight: None

Overall Performance: Accuracy: 58%

Children/Anime - Of all the tracks the model labeled as Children/Anime, 64% were actually from this genre. However, it only managed to correctly identify 49% of all actual Children/Anime tracks. So it's fairly precise but misses many true ones.

Classical/Opera - Of all the tracks labeled Classical/Opera, 74% were correct. And it captured 78% of all true Classical/Opera tracks. This is one of the most reliable genres in terms of prediction.

Country/Folk - Only 43% of tracks predicted as Country/Folk were correct meaning it's often confused with other genres. It found 52% of actual Country/Folk tracks. Both precision and recall are low, indicating confusion with similar genres.

Dance/Electronic - When predicting Dance/Electronic, 60% of those predictions were correct, and it captured 65% of true instances. This is decent, showing good model understanding for this genre.

Hip-Hop/Rap/R&B - Of all the tracks predicted as Hip-Hop/Rap/R&B, 58% were correct, and it found 55% of the actual ones. Performance here is average, with moderate false positives and false negatives.

Jazz/Blues - Only 45% of the predicted Jazz/Blues tracks were truly from this genre, and just 38% of actual Jazz/Blues tracks were identified. This is one of the weakest genres, indicating heavy misclassification.

Movie/Comedy - 81% of the tracks labeled as Movie/Comedy were correct showing the model is very confident when it predicts this genre. It also identified 72% of all actual Movie/Comedy tracks. This is a very strong performing class.

Pop/Rock - Only 42% of the tracks predicted as Pop/Rock were correct, and the model found 45% of the true Pop/Rock tracks. Performance is weak, possibly due to similarity with Country or Dance genres.

World/Soundtrack - Of all tracks labeled World/Soundtrack, 62% were correct, and it captured 58% of actual ones. This is a moderately well-performing class, with some confusion likely with Classical or Movie genres.

Random Forest¶

In [154]:
# Random Forest

# Parameter grid for tuning
#param_grid = {
    #'n_estimators': [150, 200, 250],
    #'max_depth': [4,5,6],
    #'min_samples_split': [2, 5],
    #'min_samples_leaf': [1, 2],
    #'max_features': [0.5]
#}

#grid_search = GridSearchCV(
    #estimator=rf,
    #param_grid=param_grid,
    #cv=5,
    #scoring='f1_weighted',
    #n_jobs=-1,
   #verbose=1
#)
rf = RandomForestClassifier(n_estimators = 200, max_depth = 6, max_features=6, random_state=42)
start_time = time.time()
rf.fit(X_train, y_train)
end_time = time.time()
training_duration = end_time - start_time
print(f"Training time (RandomForest): {training_duration:.2f} seconds")
Training time (RandomForest): 38.42 seconds
In [170]:
y_pred_best = rf.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, y_pred_best))
Classification Report:
                  precision    recall  f1-score   support

  Children/Anime       0.58      0.39      0.47      3135
 Classical/Opera       0.67      0.82      0.74      3398
    Country/Folk       0.42      0.38      0.40      3086
Dance/Electronic       0.50      0.74      0.60      8477
 Hip-Hop/Rap/R&B       0.58      0.48      0.52      2845
      Jazz/Blues       0.45      0.17      0.25      3307
    Movie/Comedy       0.90      0.62      0.73      3495
        Pop/Rock       0.40      0.47      0.43      4321
World/Soundtrack       0.62      0.50      0.55      3291

        accuracy                           0.54     35355
       macro avg       0.57      0.51      0.52     35355
    weighted avg       0.56      0.54      0.53     35355

In [172]:
feature_importance = rf.feature_importances_*100
rel_imp = pd.Series(feature_importance, index=numeric_features).sort_values(ascending = True,inplace=False)
rel_imp.tail(10).T.plot(kind='barh')
plt.xlabel('Variable importance')
plt.title('Variable importance for Decision Tree model')
Out[172]:
Text(0.5, 1.0, 'Variable importance for Decision Tree model')
No description has been provided for this image
In [174]:
cm_rf = confusion_matrix(y_test, y_pred_best)
disp = ConfusionMatrixDisplay(confusion_matrix=cm_rf, display_labels=class_names)
fig, ax = plt.subplots(figsize=(8,8))
disp.plot(cmap='Blues', ax=ax, xticks_rotation=90)
plt.title('Confusion Matrix')
plt.show()
No description has been provided for this image
In [176]:
class_names1 = [
    "1: Children/Anime",
    "2: Classical/Opera",
    "3: Country/Folk",
    "4: Dance/Electronic",
    "5: Hip-Hop/Rap/R&B",
    "6: Jazz/Blues",
    "7: Movie/Comedy",
    "8: Pop/Rock",
    "9: World/Soundtrack"
]
fig = plt.figure(figsize=(20, 15))
plot_tree(rf.estimators_[0], feature_names=numeric_features, class_names=class_names1, filled=True, rounded=True, fontsize = 5)
plt.title("Random Forest - Tree 0 (depth limited to 6)")
plt.show()
fig.savefig("rf_tree_depth4.pdf", bbox_inches='tight')
plt.close(fig)
No description has been provided for this image

Best Model Parameters: n_estimators = 200, max_depth = 6, max_features=6

Model Performance: Accuracy - 54%

Higher precision in Movie/Comedy (90%) and recall in Dance/Electronic (74%) and Classical/Opera (82%). This implies that when the model predicts a track as Movie/Comedy 9 out 10 times eventhough the dataset contains Dance/Electronic as the majority class. Similarly the model successfully retriueves most of the actual tracks belonging to the genre Dance/Electronic and Classical/Opera.

But overall, recall drops sharply in many genres especially Jazz/Blues.

Gradient Boosting¶

In [180]:
# Parameter grid for tuning
#param_grid = {
    #'n_estimators': [150, 200, 250],
    #'max_depth': [4,5,6],
    #'min_samples_split': [2, 5],
    #'min_samples_leaf': [1, 2],
    #'learning_rate': [0.01, 0.05, 0.1]
#}

#grid_search = GridSearchCV(
    #estimator=Tree_Bst_reg,
    #param_grid=param_grid,
    #cv=5,
    #scoring='f1_weighted',
    #n_jobs=-1,
   #verbose=1
#)
In [182]:
Tree_Bst_reg = GradientBoostingClassifier(n_estimators=250, learning_rate=0.1, max_depth=4, random_state=1)
start_time = time.time()
Tree_Bst_reg.fit(X_train,y_train)
end_time = time.time()
training_duration = end_time - start_time
print(f"Training time (GradientBoosting ): {training_duration:.2f} seconds")
Training time (GradientBoosting ): 881.50 seconds
In [186]:
y_pred_bag_reg = Tree_Bst_reg.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, y_pred_bag_reg))
Classification Report:
                  precision    recall  f1-score   support

  Children/Anime       0.71      0.55      0.62      3135
 Classical/Opera       0.78      0.82      0.80      3398
    Country/Folk       0.49      0.56      0.52      3086
Dance/Electronic       0.65      0.72      0.68      8477
 Hip-Hop/Rap/R&B       0.59      0.60      0.60      2845
      Jazz/Blues       0.54      0.46      0.50      3307
    Movie/Comedy       0.83      0.76      0.80      3495
        Pop/Rock       0.47      0.47      0.47      4321
World/Soundtrack       0.67      0.65      0.66      3291

        accuracy                           0.63     35355
       macro avg       0.64      0.62      0.63     35355
    weighted avg       0.64      0.63      0.63     35355

In [188]:
cm_gb = confusion_matrix(y_test, y_pred_bag_reg)
disp = ConfusionMatrixDisplay(confusion_matrix=cm_gb, display_labels=class_names)
fig, ax = plt.subplots(figsize=(8,8))
disp.plot(cmap='Blues', ax=ax, xticks_rotation=90)
plt.title('Confusion Matrix')
plt.show()
No description has been provided for this image
In [190]:
feature_importance = Tree_Bst_reg.feature_importances_*100
rel_imp = pd.Series(feature_importance, index=numeric_features).sort_values(ascending = True,inplace=False)
rel_imp.tail(10).T.plot(kind='barh')
plt.xlabel('Variable importance')
plt.title('Variable importance for Decision Tree model')
Out[190]:
Text(0.5, 1.0, 'Variable importance for Decision Tree model')
No description has been provided for this image
In [192]:
class_names = [
    "1: Children/Anime",
    "2: Classical/Opera",
    "3: Country/Folk",
    "4: Dance/Electronic",
    "5: Hip-Hop/Rap/R&B",
    "6: Jazz/Blues",
    "7: Movie/Comedy",
    "8: Pop/Rock",
    "9: World/Soundtrack"
]

plt.figure(figsize=(20, 10))
plot_tree(Tree_Bst_reg.estimators_[0,0], feature_names=numeric_features, class_names=class_names, filled=True, rounded=True, max_depth=4)
plt.title("Gradient Boosting - Tree 0")
plt.show()
No description has been provided for this image

Model Parameters: n_estimators=200 learning_rate=0.1 max_depth=4

Model Performance: Accuracy = 63%

Classical/Opera and Movie/Comedy which has a perfect balance between precision and recall, model is both accurate and consistent in identifying this genre.

Dance/Electronic: The interesting thing here is even though the model is exposed to a significantly higher number of Dance/Electronic tracks during training, it does not overfit by assigning this label to ambiguous tracks. Instead, it tries to balance class prediction, maintaining reasonable precision

This indicates the model is not biased towards the majority class.

Pop/Rock has both low precision (47%) and recall (47%) indicating this genre remains hard for the model to classify.

Jazz/Blues has one of the lowest recall scores (46%) many actual Jazz/Blues tracks are being misclassified.

Indicates this genre is still not well captured by the model.

Gradient Boosting achieves the best balance between precision and recall across all most all genres when compared to decision tree and random forest.

In [195]:
y_multibst = spotify_df['genre_grouped']
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y_multibst)
y_train_ds1 = label_encoder.fit_transform(y_train)
y_test_ds1 = label_encoder.fit_transform(y_test)
X_train, X_test, y_train, y_test = train_test_split(
    df_multi, y_encoded, test_size=0.2, random_state=42, stratify=y_multi
)
In [207]:
import xgboost as xgb

# Create the XGBoost classifier
xgb_clf = xgb.XGBClassifier(
    objective='multi:softmax',
    num_class=9,
    eval_metric='mlogloss',
    use_label_encoder=False,
    n_estimators=250,
    max_depth=5,
    learning_rate=0.1,
    random_state=42
)

# Train the model
start_time = time.time()
xgb_clf.fit(X_train, y_train)
y_pred_xgb = xgb_clf.predict(X_test)
end_time = time.time()
training_duration = end_time - start_time
print(f"Training time (XGBoost): {training_duration:.2f} seconds")

cm_xgb = confusion_matrix(y_test, y_pred_xgb)
disp = ConfusionMatrixDisplay(confusion_matrix=cm_xgb, display_labels=class_names)
fig, ax = plt.subplots(figsize=(8,8))
disp.plot(cmap='Blues', ax=ax, xticks_rotation=90)
plt.title('Confusion Matrix')
plt.show()

print("\nClassification Report:")
print(classification_report(y_test, y_pred_xgb))
/opt/anaconda3/lib/python3.12/site-packages/xgboost/training.py:183: UserWarning: [11:28:39] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:738: 
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Training time (XGBoost): 3.95 seconds
No description has been provided for this image
Classification Report:
              precision    recall  f1-score   support

           0       0.73      0.55      0.63      3135
           1       0.78      0.83      0.80      3398
           2       0.49      0.57      0.52      3086
           3       0.65      0.72      0.68      8477
           4       0.60      0.61      0.61      2845
           5       0.55      0.47      0.51      3307
           6       0.84      0.76      0.80      3495
           7       0.47      0.46      0.47      4321
           8       0.68      0.67      0.68      3291

    accuracy                           0.64     35355
   macro avg       0.64      0.63      0.63     35355
weighted avg       0.64      0.64      0.64     35355

In [209]:
feature_importance = xgb_clf.feature_importances_*100
rel_imp = pd.Series(feature_importance, index=numeric_features).sort_values(ascending = True,inplace=False)
rel_imp.tail(10).T.plot(kind='barh')
plt.xlabel('Variable importance')
plt.title('Variable importance for Decision Tree model')
Out[209]:
Text(0.5, 1.0, 'Variable importance for Decision Tree model')
No description has been provided for this image

KNN¶

In [85]:
X_train_pca, X_test_pca, y_train_pca, y_test_pca = train_test_split(
    df_multi, y_multi, test_size=0.2, random_state=42, stratify=y_multi
)
In [87]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=5, metric="euclidean", weights= "uniform", p=2)  # Euclidean distance
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)
print(classification_report(y_test, y_pred_knn))
              precision    recall  f1-score   support

           0       0.55      0.56      0.56      3135
           1       0.69      0.79      0.74      3398
           2       0.37      0.51      0.43      3086
           3       0.60      0.69      0.64      8477
           4       0.55      0.49      0.52      2845
           5       0.48      0.38      0.43      3307
           6       0.83      0.71      0.76      3495
           7       0.41      0.31      0.36      4321
           8       0.61      0.56      0.58      3291

    accuracy                           0.57     35355
   macro avg       0.57      0.56      0.56     35355
weighted avg       0.57      0.57      0.57     35355

In [89]:
from sklearn.decomposition import PCA, TruncatedSVD

svd = TruncatedSVD(n_components=df_multi.shape[1] - 1, random_state=42)
svd.fit(df_multi)
Out[89]:
TruncatedSVD(n_components=10, random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
TruncatedSVD(n_components=10, random_state=42)
In [91]:
explained_var = np.cumsum(svd.explained_variance_ratio_)
plt.figure(figsize=(8, 4))
plt.plot(explained_var, marker='o')
plt.axhline(y=0.95, color='r', linestyle='--')
plt.title("Cumulative Explained Variance by SVD Components")
plt.xlabel("Number of Components")
plt.ylabel("Cumulative Variance")
plt.grid(True)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [93]:
pca = PCA(n_components=7, random_state=42)
X_train_pca = pca.fit_transform(df_multi)
In [95]:
X_train_pca, X_test_pca, y_train_pca, y_test_pca = train_test_split(
    X_train_pca, y_multi, test_size=0.2, random_state=42, stratify=y_multi
)
In [97]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=5, metric="euclidean", weights= "uniform", p=2)  # Euclidean distance
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)
print(classification_report(y_test, y_pred_knn))
              precision    recall  f1-score   support

           0       0.55      0.56      0.56      3135
           1       0.69      0.79      0.74      3398
           2       0.37      0.51      0.43      3086
           3       0.60      0.69      0.64      8477
           4       0.55      0.49      0.52      2845
           5       0.48      0.38      0.43      3307
           6       0.83      0.71      0.76      3495
           7       0.41      0.31      0.36      4321
           8       0.61      0.56      0.58      3291

    accuracy                           0.57     35355
   macro avg       0.57      0.56      0.56     35355
weighted avg       0.57      0.57      0.57     35355

After applying PCA with 7 components training is done using KNN classifier. The accuracy dropped from 57% to 53% compared to the original KNN model (without PCA). This indicates the reduced feature space didn't preserve enough information for genre classification.

In [ ]: